package me.xuym.crawler.t66y;

import com.cbs.java.component.application.Application;
import me.xuym.crawler.Processor;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created by broche on 05/08/2017.
 */
public class T66yConvertor extends Processor<String, List<T66yResult>> {

    public T66yConvertor(Application application) {
        super(application);
    }

    @Override
    protected List<T66yResult> onProcess(String input) throws Exception {
        String contentStr = input.replace("\n", "");
        List<T66yResult> results = new ArrayList<>();
        Pattern pattern = Pattern.compile("<h3><a href=\"([^\"]*)\"[^>]*>(.*?)</a></h3>");
        Matcher matcher = pattern.matcher(contentStr);
        while (matcher.find()) {
            if (matcher.groupCount() == 2) {
                T66yResult result = new T66yResult();
                result.setUrl(matcher.group(1));
                result.setContent(matcher.group(2));
                results.add(result);
            }
        }
        return results;
    }
}
